library(dplyr)
library(data.table)
library(stringr)

recode_to_7 <- function(first, second, third, pro_tech_text, anti_tech_text, in_between_text){
  out <- rep(NA, length(df))
  out[first == 'Very close'] <- 1
  out[first == 'Somewhat close'] <- 2
  out[second == pro_tech_text] <- 3
  out[second == in_between_text] <- 4
  out[second == anti_tech_text] <- 5
  
  out[third == "Somewhat close"] <- 6
  out[third == "Very close"] <- 7
  out
}

import_text_wave_2 <- function(){
  df <- fread('Data/text-wave-2.csv', colClasses='character')
  df <- df[order(as.Date(df$EndDate)),]
  opt_in <- df$Q1.2 == 'I agree to participate'
  mean(opt_in)
  df <- df[opt_in,]
  
  pass_easy <- df$Q2.1 == 15
  mean(pass_easy, na.rm=T)
  
  pass_att1 <- (df$Q3.3_1 == 'Every day' & df$Q3.3_5=='Never') & 
    rowSums(df[,grepl('Q3.3', colnames(df)), with=F] != '') ==2
  
  break_up <- df[,grepl('Q6.9', colnames(df)), with=F]
  break_tech <- rowSums(break_up[,1:5] != '')
  break_placebo <- rowSums(break_up[,6:9] != '')
  
  fav <- df[,grepl( 'Q10.1', colnames(df)), with=F]
  fav <- apply(fav, 2, function(x){ recode(x, 'Very favorable'=7, 
                                           'Favorable'=6, 
                                           'Somewhat favorable'=5,
                                           'Neither favorable nor unfavorable'=4,
                                           'Somewhat unfavorable'=3,
                                           'Unfavorable'=2,
                                           'Very unfavorable'=1,
                                           .default = NA_real_)})
  fav_tech <- rowMeans(fav[,1:5])
  fav_plac <- rowMeans(fav[,6:ncol(fav)])
  
  med_pref <- str_extract(df$Q3.5, 'MSNBC|Fox News|Food Network') 
  
  
  scale <- recode_to_7(first=df$Q6.3, second=df$Q6.7, third=df$Q6.5, 
                       pro_tech_text='The scale and efficiency of large tech companies like Apple, Amazon, Facebook, and Google benefits consumers more than it hurts them',
                       anti_tech_text='Large tech companies like Apple, Amazon, Facebook, and Google use their size to gain an unfair advantage over competitors and disadvantage consumers',
                       in_between_text='Both come equally close to my view')
  
  censorship <- recode_to_7(first=df$Q7.5, second=df$Q7.7, third=df$Q7.3, 
                            pro_tech_text='Social networks like Facebook and Twitter should allow their users to freely express their views, even if it means allowing false, offensive, or harmful content to circulate',
                            anti_tech_text='Social networks like Facebook and Twitter should do more to remove false, offensive, or harmful content from their platforms',
                            in_between_text='Truly unsure')
  
  privacy <- recode_to_7(first=df$Q8.3, second=df$Q8.7, third=df$Q8.5, 
                         pro_tech_text='Big tech companies do a good job of keeping their users’ information secure',
                         anti_tech_text="Big tech companies do not do a good job of keeping their users’ information secure",
                         in_between_text='Truly unsure')
  
  
  congress <- recode_to_7(first=df$Q8.13, second=df$Q8.15, third=df$Q8.11, 
                          pro_tech_text="Congress should allow large tech companies to store and use data on their users as they see fit",
                          anti_tech_text="Congress should more actively regulate how large tech companies gather and store data on their users",
                          in_between_text='Truly unsure')
  
  
  influence <- recode_to_7(first=df$Q9.4, second=df$Q9.5, third=df$Q9.3, 
                           pro_tech_text="The influence of big tech companies on political life in America is often exaggerated",
                           anti_tech_text="Big tech companies exert too much influence over the political life in America" ,
                           in_between_text='Truly unsure')
  
  
  understand_tech <- recode(df$Q4.2,
                            'Very well'=4,
                            'Fairly well'=3,
                            'Not very well'=2,
                            'Not at all'=1,
                            .default = NA_real_)
  
  
  
  familiar <- apply(df[,grepl('Q4.4', colnames(df)), with=F], 2, recode, 'Extremely familiar'=5, 'Very familiar'=4,
                    'Moderately familiar'=3, 'Slightly familiar'=2, 'Not familiar at all'=1, .default = NA_real_) %>%
    rowMeans()
  
  outcomes <- data.frame(scale, censorship, privacy, congress, influence, fav_tech,break_tech)
  outcomes_index <- rowMeans(data.frame(1-(scale/7), censorship/7, 1-(privacy/7), 1-(congress/7), 1-(influence/7), fav_tech/7,1-(break_tech/5)))
  complete_df <- df[complete.cases(outcomes),]
  complete_df$outcomes_index <- outcomes_index[!is.na(outcomes_index)]
  complete_df$pc <- -prcomp(outcomes[complete.cases(outcomes),], retx=T)$x[,1]
  complete_df$pc <- complete_df$pc/sd(complete_df$pc)
  if(cor(complete_df$pc,complete_df$outcomes_index)<0){
    complete_df$pc <- -complete_df$pc 
  }
  to_return <- data.frame(EndDate=df$EndDate, break_tech, break_placebo, fav_tech, fav_plac, 
                          understand_tech, pass_att1, scale, censorship, privacy, congress, influence, fav_tech, 
                          PID=df$PID, med_pref=med_pref)
  to_return <- to_return[!duplicated(to_return$PID),]
  to_return <- left_join(to_return, complete_df[!duplicated(complete_df$PID),c('pc', 'PID')])
  to_return
}
